{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build a Web App using a Regression model to learn about UFO sighting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>datetime</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>country</th>\n",
       "      <th>shape</th>\n",
       "      <th>duration (seconds)</th>\n",
       "      <th>duration (hours/min)</th>\n",
       "      <th>comments</th>\n",
       "      <th>date posted</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10/10/1949 20:30</td>\n",
       "      <td>san marcos</td>\n",
       "      <td>tx</td>\n",
       "      <td>us</td>\n",
       "      <td>cylinder</td>\n",
       "      <td>2700.0</td>\n",
       "      <td>45 minutes</td>\n",
       "      <td>This event took place in early fall around 194...</td>\n",
       "      <td>4/27/2004</td>\n",
       "      <td>29.883056</td>\n",
       "      <td>-97.941111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>10/10/1949 21:00</td>\n",
       "      <td>lackland afb</td>\n",
       "      <td>tx</td>\n",
       "      <td>NaN</td>\n",
       "      <td>light</td>\n",
       "      <td>7200.0</td>\n",
       "      <td>1-2 hrs</td>\n",
       "      <td>1949 Lackland AFB&amp;#44 TX.  Lights racing acros...</td>\n",
       "      <td>12/16/2005</td>\n",
       "      <td>29.384210</td>\n",
       "      <td>-98.581082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10/10/1955 17:00</td>\n",
       "      <td>chester (uk/england)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>gb</td>\n",
       "      <td>circle</td>\n",
       "      <td>20.0</td>\n",
       "      <td>20 seconds</td>\n",
       "      <td>Green/Orange circular disc over Chester&amp;#44 En...</td>\n",
       "      <td>1/21/2008</td>\n",
       "      <td>53.200000</td>\n",
       "      <td>-2.916667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10/10/1956 21:00</td>\n",
       "      <td>edna</td>\n",
       "      <td>tx</td>\n",
       "      <td>us</td>\n",
       "      <td>circle</td>\n",
       "      <td>20.0</td>\n",
       "      <td>1/2 hour</td>\n",
       "      <td>My older brother and twin sister were leaving ...</td>\n",
       "      <td>1/17/2004</td>\n",
       "      <td>28.978333</td>\n",
       "      <td>-96.645833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>10/10/1960 20:00</td>\n",
       "      <td>kaneohe</td>\n",
       "      <td>hi</td>\n",
       "      <td>us</td>\n",
       "      <td>light</td>\n",
       "      <td>900.0</td>\n",
       "      <td>15 minutes</td>\n",
       "      <td>AS a Marine 1st Lt. flying an FJ4B fighter/att...</td>\n",
       "      <td>1/22/2004</td>\n",
       "      <td>21.418056</td>\n",
       "      <td>-157.803611</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           datetime                  city state country     shape  \\\n",
       "0  10/10/1949 20:30            san marcos    tx      us  cylinder   \n",
       "1  10/10/1949 21:00          lackland afb    tx     NaN     light   \n",
       "2  10/10/1955 17:00  chester (uk/england)   NaN      gb    circle   \n",
       "3  10/10/1956 21:00                  edna    tx      us    circle   \n",
       "4  10/10/1960 20:00               kaneohe    hi      us     light   \n",
       "\n",
       "   duration (seconds) duration (hours/min)  \\\n",
       "0              2700.0           45 minutes   \n",
       "1              7200.0              1-2 hrs   \n",
       "2                20.0           20 seconds   \n",
       "3                20.0             1/2 hour   \n",
       "4               900.0           15 minutes   \n",
       "\n",
       "                                            comments date posted   latitude  \\\n",
       "0  This event took place in early fall around 194...   4/27/2004  29.883056   \n",
       "1  1949 Lackland AFB&#44 TX.  Lights racing acros...  12/16/2005  29.384210   \n",
       "2  Green/Orange circular disc over Chester&#44 En...   1/21/2008  53.200000   \n",
       "3  My older brother and twin sister were leaving ...   1/17/2004  28.978333   \n",
       "4  AS a Marine 1st Lt. flying an FJ4B fighter/att...   1/22/2004  21.418056   \n",
       "\n",
       "    longitude  \n",
       "0  -97.941111  \n",
       "1  -98.581082  \n",
       "2   -2.916667  \n",
       "3  -96.645833  \n",
       "4 -157.803611  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "ufos = pd.read_csv('./data/ufos.csv')\n",
    "ufos.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})\n",
    "\n",
    "ufos.Country.unique()\n",
    "\n",
    "# 0 au, 1 ca, 2 de, 3 gb, 4 us"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 25863 entries, 2 to 80330\n",
      "Data columns (total 4 columns):\n",
      " #   Column     Non-Null Count  Dtype  \n",
      "---  ------     --------------  -----  \n",
      " 0   Seconds    25863 non-null  float64\n",
      " 1   Country    25863 non-null  object \n",
      " 2   Latitude   25863 non-null  float64\n",
      " 3   Longitude  25863 non-null  float64\n",
      "dtypes: float64(3), object(1)\n",
      "memory usage: 1010.3+ KB\n"
     ]
    }
   ],
   "source": [
    "ufos.dropna(inplace=True)\n",
    "\n",
    "ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]\n",
    "\n",
    "ufos.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Seconds</th>\n",
       "      <th>Country</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20.0</td>\n",
       "      <td>3</td>\n",
       "      <td>53.200000</td>\n",
       "      <td>-2.916667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20.0</td>\n",
       "      <td>4</td>\n",
       "      <td>28.978333</td>\n",
       "      <td>-96.645833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>30.0</td>\n",
       "      <td>4</td>\n",
       "      <td>35.823889</td>\n",
       "      <td>-80.253611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>60.0</td>\n",
       "      <td>4</td>\n",
       "      <td>45.582778</td>\n",
       "      <td>-122.352222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3</td>\n",
       "      <td>51.783333</td>\n",
       "      <td>-0.783333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Seconds  Country   Latitude   Longitude\n",
       "2      20.0        3  53.200000   -2.916667\n",
       "3      20.0        4  28.978333  -96.645833\n",
       "14     30.0        4  35.823889  -80.253611\n",
       "23     60.0        4  45.582778 -122.352222\n",
       "24      3.0        3  51.783333   -0.783333"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])\n",
    "\n",
    "ufos.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "Selected_features = ['Seconds','Latitude','Longitude']\n",
    "\n",
    "X = ufos[Selected_features]\n",
    "y = ufos['Country']\n",
    "\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      1.00      1.00        41\n",
      "           1       0.82      0.22      0.35       250\n",
      "           2       1.00      1.00      1.00         8\n",
      "           3       1.00      1.00      1.00       131\n",
      "           4       0.96      1.00      0.98      4743\n",
      "\n",
      "    accuracy                           0.96      5173\n",
      "   macro avg       0.96      0.84      0.87      5173\n",
      "weighted avg       0.96      0.96      0.95      5173\n",
      "\n",
      "Predicted labels:  [4 4 4 ... 3 4 4]\n",
      "Accuracy:  0.9601778465107288\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "e:\\dev_workspace\\gitee\\code-org\\ai-lesson\\.venv\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:465: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
      "STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.\n",
      "\n",
      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
      "Please also refer to the documentation for alternative solver options:\n",
      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
      "  n_iter_i = _check_optimize_result(\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, classification_report \n",
    "from sklearn.linear_model import LogisticRegression\n",
    "model = LogisticRegression()\n",
    "model.fit(X_train, y_train)\n",
    "predictions = model.predict(X_test)\n",
    "\n",
    "print(classification_report(y_test, predictions))\n",
    "print('Predicted labels: ', predictions)\n",
    "print('Accuracy: ', accuracy_score(y_test, predictions))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "e:\\dev_workspace\\gitee\\code-org\\ai-lesson\\.venv\\Lib\\site-packages\\sklearn\\utils\\validation.py:2739: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "model_filename = 'ufo-model.pkl'\n",
    "pickle.dump(model, open(model_filename,'wb'))\n",
    "\n",
    "model = pickle.load(open('ufo-model.pkl','rb'))\n",
    "print(model.predict([[50,44,-12]]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  },
  "metadata": {
   "interpreter": {
    "hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
   }
  },
  "orig_nbformat": 2
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
